/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
/*-------------------------------------------------------------------------
 *
 * jsonb_gin.c
 *   GIN support functions for jsonb
 *
 * Copyright (c) 2014-2015, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *    src/backend/utils/adt/jsonb_gin.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/gin.h"
#include "access/hash.h"
#include "catalog/pg_type.h"
#include "utils/builtins.h"
#include "utils/jsonb.h"

typedef struct PathHashStack
{
  uint32    hash;
  struct PathHashStack *parent;
} PathHashStack;

static Datum make_text_key(char flag, const char *str, int len);
static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);

/*
 *
 * jsonb_ops GIN opclass support functions
 *
 */

Datum
gin_compare_jsonb(PG_FUNCTION_ARGS)
{
  text     *arg1 = PG_GETARG_TEXT_PP(0);
  text     *arg2 = PG_GETARG_TEXT_PP(1);
  int32   result;
  char     *a1p,
         *a2p;
  int     len1,
        len2;

  a1p = VARDATA_ANY(arg1);
  a2p = VARDATA_ANY(arg2);

  len1 = VARSIZE_ANY_EXHDR(arg1);
  len2 = VARSIZE_ANY_EXHDR(arg2);

  /* Compare text as bttextcmp does, but always using C collation */
  result = varstr_cmp(a1p, len1, a2p, len2);

  PG_FREE_IF_COPY(arg1, 0);
  PG_FREE_IF_COPY(arg2, 1);

  PG_RETURN_INT32(result);
}

Datum
gin_extract_jsonb(PG_FUNCTION_ARGS)
{
  Jsonb    *jb = (Jsonb *) PG_GETARG_JSONB(0);
  int32    *nentries = (int32 *) PG_GETARG_POINTER(1);
  int     total = 2 * JB_ROOT_COUNT(jb);
  JsonbIterator *it;
  JsonbValue  v;
  JsonbIteratorToken r;
  int     i = 0;
  Datum    *entries;

  /* If the root level is empty, we certainly have no keys */
  if (total == 0)
  {
    *nentries = 0;
    PG_RETURN_POINTER(NULL);
  }

  /* Otherwise, use 2 * root count as initial estimate of result size */
  entries = (Datum *) palloc(sizeof(Datum) * total);

  it = JsonbIteratorInit(&jb->root);

  while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
  {
    /* Since we recurse into the object, we might need more space */
    if (i >= total)
    {
      total *= 2;
      entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
    }

    switch (r)
    {
      case WJB_KEY:
        entries[i++] = make_scalar_key(&v, true);
        break;
      case WJB_ELEM:
        /* Pretend string array elements are keys, see jsonb.h */
        entries[i++] = make_scalar_key(&v, (v.type == jbvString));
        break;
      case WJB_VALUE:
        entries[i++] = make_scalar_key(&v, false);
        break;
      default:
        /* we can ignore structural items */
        break;
    }
  }

  *nentries = i;

  PG_RETURN_POINTER(entries);
}

Datum
gin_extract_jsonb_query(PG_FUNCTION_ARGS)
{
  int32    *nentries = (int32 *) PG_GETARG_POINTER(1);
  StrategyNumber strategy = PG_GETARG_UINT16(2);
  int32    *searchMode = (int32 *) PG_GETARG_POINTER(6);
  Datum    *entries;

  if (strategy == JsonbContainsStrategyNumber)
  {
    /* Query is a jsonb, so just apply gin_extract_jsonb... */
    entries = (Datum *)
      DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb,
                        PG_GETARG_DATUM(0),
                        PointerGetDatum(nentries)));
    /* ...although "contains {}" requires a full index scan */
    if (*nentries == 0)
      *searchMode = GIN_SEARCH_MODE_ALL;
  }
  else if (strategy == JsonbExistsStrategyNumber)
  {
    /* Query is a text string, which we treat as a key */
    text     *query = PG_GETARG_TEXT_PP(0);

    *nentries = 1;
    entries = (Datum *) palloc(sizeof(Datum));
    entries[0] = make_text_key(JGINFLAG_KEY,
                   VARDATA_ANY(query),
                   VARSIZE_ANY_EXHDR(query));
  }
  else if (strategy == JsonbExistsAnyStrategyNumber ||
       strategy == JsonbExistsAllStrategyNumber)
  {
    /* Query is a text array; each element is treated as a key */
    ArrayType  *query = PG_GETARG_ARRAYTYPE_P(0);
    Datum    *key_datums;
    bool     *key_nulls;
    int     key_count;
    int     i,
          j;

    deconstruct_array(query,
              TEXTOID, -1, false, 'i',
              &key_datums, &key_nulls, &key_count);

    entries = (Datum *) palloc(sizeof(Datum) * key_count);

    for (i = 0, j = 0; i < key_count; i++)
    {
      /* Nulls in the array are ignored */
      if (key_nulls[i])
        continue;
      entries[j++] = make_text_key(JGINFLAG_KEY,
                     VARDATA_ANY(key_datums[i]),
                     VARSIZE_ANY_EXHDR(key_datums[i]));
    }

    *nentries = j;
    /* ExistsAll with no keys should match everything */
    if (j == 0 && strategy == JsonbExistsAllStrategyNumber)
      *searchMode = GIN_SEARCH_MODE_ALL;
  }
  else
  {
    elog(ERROR, "unrecognized strategy number: %d", strategy);
    entries = NULL;     /* keep compiler quiet */
  }

  PG_RETURN_POINTER(entries);
}

Datum
gin_consistent_jsonb(PG_FUNCTION_ARGS)
{
  bool     *check = (bool *) PG_GETARG_POINTER(0);
  StrategyNumber strategy = PG_GETARG_UINT16(1);

  /* Jsonb     *query = PG_GETARG_JSONB(2); */
  int32   nkeys = PG_GETARG_INT32(3);

  /* Pointer     *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
  bool     *recheck = (bool *) PG_GETARG_POINTER(5);
  bool    res = true;
  int32   i;

  if (strategy == JsonbContainsStrategyNumber)
  {
    /*
     * We must always recheck, since we can't tell from the index whether
     * the positions of the matched items match the structure of the query
     * object.  (Even if we could, we'd also have to worry about hashed
     * keys and the index's failure to distinguish keys from string array
     * elements.)  However, the tuple certainly doesn't match unless it
     * contains all the query keys.
     */
    *recheck = true;
    for (i = 0; i < nkeys; i++)
    {
      if (!check[i])
      {
        res = false;
        break;
      }
    }
  }
  else if (strategy == JsonbExistsStrategyNumber)
  {
    /*
     * Although the key is certainly present in the index, we must recheck
     * because (1) the key might be hashed, and (2) the index match might
     * be for a key that's not at top level of the JSON object.  For (1),
     * we could look at the query key to see if it's hashed and not
     * recheck if not, but the index lacks enough info to tell about (2).
     */
    *recheck = true;
    res = true;
  }
  else if (strategy == JsonbExistsAnyStrategyNumber)
  {
    /* As for plain exists, we must recheck */
    *recheck = true;
    res = true;
  }
  else if (strategy == JsonbExistsAllStrategyNumber)
  {
    /* As for plain exists, we must recheck */
    *recheck = true;
    /* ... but unless all the keys are present, we can say "false" */
    for (i = 0; i < nkeys; i++)
    {
      if (!check[i])
      {
        res = false;
        break;
      }
    }
  }
  else
    elog(ERROR, "unrecognized strategy number: %d", strategy);

  PG_RETURN_BOOL(res);
}

Datum
gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
{
  GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
  StrategyNumber strategy = PG_GETARG_UINT16(1);

  /* Jsonb     *query = PG_GETARG_JSONB(2); */
  int32   nkeys = PG_GETARG_INT32(3);

  /* Pointer     *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
  GinTernaryValue res = GIN_MAYBE;
  int32   i;

  /*
   * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
   * corresponds to always forcing recheck in the regular consistent
   * function, for the reasons listed there.
   */
  if (strategy == JsonbContainsStrategyNumber ||
    strategy == JsonbExistsAllStrategyNumber)
  {
    /* All extracted keys must be present */
    for (i = 0; i < nkeys; i++)
    {
      if (check[i] == GIN_FALSE)
      {
        res = GIN_FALSE;
        break;
      }
    }
  }
  else if (strategy == JsonbExistsStrategyNumber ||
       strategy == JsonbExistsAnyStrategyNumber)
  {
    /* At least one extracted key must be present */
    res = GIN_FALSE;
    for (i = 0; i < nkeys; i++)
    {
      if (check[i] == GIN_TRUE ||
        check[i] == GIN_MAYBE)
      {
        res = GIN_MAYBE;
        break;
      }
    }
  }
  else
    elog(ERROR, "unrecognized strategy number: %d", strategy);

  PG_RETURN_GIN_TERNARY_VALUE(res);
}

/*
 *
 * jsonb_path_ops GIN opclass support functions
 *
 * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON
 * value; but the JSON key(s) leading to each value are also included in its
 * hash computation.  This means we can only support containment queries,
 * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
 * since different hashes will be generated.
 *
 */

Datum
gin_extract_jsonb_path(PG_FUNCTION_ARGS)
{
  Jsonb    *jb = PG_GETARG_JSONB(0);
  int32    *nentries = (int32 *) PG_GETARG_POINTER(1);
  int     total = 2 * JB_ROOT_COUNT(jb);
  JsonbIterator *it;
  JsonbValue  v;
  JsonbIteratorToken r;
  PathHashStack tail;
  PathHashStack *stack;
  int     i = 0;
  Datum    *entries;

  /* If the root level is empty, we certainly have no keys */
  if (total == 0)
  {
    *nentries = 0;
    PG_RETURN_POINTER(NULL);
  }

  /* Otherwise, use 2 * root count as initial estimate of result size */
  entries = (Datum *) palloc(sizeof(Datum) * total);

  /* We keep a stack of partial hashes corresponding to parent key levels */
  tail.parent = NULL;
  tail.hash = 0;
  stack = &tail;

  it = JsonbIteratorInit(&jb->root);

  while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
  {
    PathHashStack *parent;

    /* Since we recurse into the object, we might need more space */
    if (i >= total)
    {
      total *= 2;
      entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
    }

    switch (r)
    {
      case WJB_BEGIN_ARRAY:
      case WJB_BEGIN_OBJECT:
        /* Push a stack level for this object */
        parent = stack;
        stack = (PathHashStack *) palloc(sizeof(PathHashStack));

        if (parent->parent)
        {
          /*
           * We pass forward hashes from previous container nesting
           * levels so that nested arrays with an outermost nested
           * object will have element hashes mixed with the
           * outermost key.  It's also somewhat useful to have
           * nested objects' innermost values have hashes that are a
           * function of not just their own key, but outer keys too.
           *
           * Nesting an array within another array will not alter
           * innermost scalar element hash values, but that seems
           * inconsequential.
           */
          stack->hash = parent->hash;
        }
        else
        {
          /*
           * At the outermost level, initialize hash with container
           * type proxy value.  Note that this makes JB_FARRAY and
           * JB_FOBJECT part of the on-disk representation, but they
           * are that in the base jsonb object storage already.
           */
          stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
        }
        stack->parent = parent;
        break;
      case WJB_KEY:
        /* initialize hash from parent */
        stack->hash = stack->parent->hash;
        /* and mix in this key */
        JsonbHashScalarValue(&v, &stack->hash);
        /* hash is now ready to incorporate the value */
        break;
      case WJB_ELEM:
        /* array elements use parent hash mixed with element's hash */
        stack->hash = stack->parent->hash;
        /* FALL THRU */
      case WJB_VALUE:
        /* mix the element or value's hash into the prepared hash */
        JsonbHashScalarValue(&v, &stack->hash);
        /* and emit an index entry */
        entries[i++] = UInt32GetDatum(stack->hash);
        /* Note: we assume we'll see KEY before another VALUE */
        break;
      case WJB_END_ARRAY:
      case WJB_END_OBJECT:
        /* Pop the stack */
        parent = stack->parent;
        pfree(stack);
        stack = parent;
        break;
      default:
        elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
    }
  }

  *nentries = i;

  PG_RETURN_POINTER(entries);
}

Datum
gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)
{
  int32    *nentries = (int32 *) PG_GETARG_POINTER(1);
  StrategyNumber strategy = PG_GETARG_UINT16(2);
  int32    *searchMode = (int32 *) PG_GETARG_POINTER(6);
  Datum    *entries;

  if (strategy != JsonbContainsStrategyNumber)
    elog(ERROR, "unrecognized strategy number: %d", strategy);

  /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */
  entries = (Datum *)
    DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path,
                      PG_GETARG_DATUM(0),
                      PointerGetDatum(nentries)));

  /* ... although "contains {}" requires a full index scan */
  if (*nentries == 0)
    *searchMode = GIN_SEARCH_MODE_ALL;

  PG_RETURN_POINTER(entries);
}

Datum
gin_consistent_jsonb_path(PG_FUNCTION_ARGS)
{
  bool     *check = (bool *) PG_GETARG_POINTER(0);
  StrategyNumber strategy = PG_GETARG_UINT16(1);

  /* Jsonb     *query = PG_GETARG_JSONB(2); */
  int32   nkeys = PG_GETARG_INT32(3);

  /* Pointer     *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
  bool     *recheck = (bool *) PG_GETARG_POINTER(5);
  bool    res = true;
  int32   i;

  if (strategy != JsonbContainsStrategyNumber)
    elog(ERROR, "unrecognized strategy number: %d", strategy);

  /*
   * jsonb_path_ops is necessarily lossy, not only because of hash
   * collisions but also because it doesn't preserve complete information
   * about the structure of the JSON object.  Besides, there are some
   * special rules around the containment of raw scalars in arrays that are
   * not handled here.  So we must always recheck a match.  However, if not
   * all of the keys are present, the tuple certainly doesn't match.
   */
  *recheck = true;
  for (i = 0; i < nkeys; i++)
  {
    if (!check[i])
    {
      res = false;
      break;
    }
  }

  PG_RETURN_BOOL(res);
}

Datum
gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)
{
  GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
  StrategyNumber strategy = PG_GETARG_UINT16(1);

  /* Jsonb     *query = PG_GETARG_JSONB(2); */
  int32   nkeys = PG_GETARG_INT32(3);

  /* Pointer     *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
  GinTernaryValue res = GIN_MAYBE;
  int32   i;

  if (strategy != JsonbContainsStrategyNumber)
    elog(ERROR, "unrecognized strategy number: %d", strategy);

  /*
   * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
   * corresponds to always forcing recheck in the regular consistent
   * function, for the reasons listed there.
   */
  for (i = 0; i < nkeys; i++)
  {
    if (check[i] == GIN_FALSE)
    {
      res = GIN_FALSE;
      break;
    }
  }

  PG_RETURN_GIN_TERNARY_VALUE(res);
}

/*
 * Construct a jsonb_ops GIN key from a flag byte and a textual representation
 * (which need not be null-terminated).  This function is responsible
 * for hashing overlength text representations; it will add the
 * JGINFLAG_HASHED bit to the flag value if it does that.
 */
static Datum
make_text_key(char flag, const char *str, int len)
{
  text     *item;
  char    hashbuf[10];

  if (len > JGIN_MAXLENGTH)
  {
    uint32    hashval;

    hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
    snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
    str = hashbuf;
    len = 8;
    flag |= JGINFLAG_HASHED;
  }

  /*
   * Now build the text Datum.  For simplicity we build a 4-byte-header
   * varlena text Datum here, but we expect it will get converted to short
   * header format when stored in the index.
   */
  item = (text *) palloc(VARHDRSZ + len + 1);
  SET_VARSIZE(item, VARHDRSZ + len + 1);

  *VARDATA(item) = flag;

  memcpy(VARDATA(item) + 1, str, len);

  return PointerGetDatum(item);
}

/*
 * Create a textual representation of a JsonbValue that will serve as a GIN
 * key in a jsonb_ops index.  is_key is true if the JsonbValue is a key,
 * or if it is a string array element (since we pretend those are keys,
 * see jsonb.h).
 */
static Datum
make_scalar_key(const JsonbValue *scalarVal, bool is_key)
{
  Datum   item;
  char     *cstr;

  switch (scalarVal->type)
  {
    case jbvNull:
      Assert(!is_key);
      item = make_text_key(JGINFLAG_NULL, "", 0);
      break;
    case jbvBool:
      Assert(!is_key);
      item = make_text_key(JGINFLAG_BOOL,
                 scalarVal->val.boolean ? "t" : "f", 1);
      break;
    case jbvNumeric:
      Assert(!is_key);

      /*
       * A normalized textual representation, free of trailing zeroes,
       * is required so that numerically equal values will produce equal
       * strings.
       *
       * It isn't ideal that numerics are stored in a relatively bulky
       * textual format.  However, it's a notationally convenient way of
       * storing a "union" type in the GIN B-Tree, and indexing Jsonb
       * strings takes precedence.
       */
      cstr = numeric_normalize(scalarVal->val.numeric);
      item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
      pfree(cstr);
      break;
    case jbvString:
      item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
                 scalarVal->val.string.val,
                 scalarVal->val.string.len);
      break;
    default:
      elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
      item = 0;     /* keep compiler quiet */
      break;
  }

  return item;
}
